import pandas as pd
import numpy as np
import pandas_profiling
from math import *
from IPython.core.display import display, HTML
d_train = pd.read_csv('../../data/first_round_training_data.csv',encoding='gbk')
d_train.profile_report(style={'full_width':True})
数据太偏,有点不太容易看分布,把它取对数再来
d_train_log = d_train
for p in ["Parameter"+str(i) for i in range(1,11)]+["Attribute"+str(i) for i in range(1,11)]:
d_train_log[p] = d_train[p].apply(lambda x:log(x))
d_train_log.profile_report(style={'full_width':True})
由于需要加载<script>,可能需要较长时间才能加载出操作面板
sprite_size = 32 if len(d_train.index)>50000 else 64
jsonstr = d_train.to_json(orient='records')
# Display the Dive visualization for this data
# Create Facets template
HTML_TEMPLATE = """
<script src="https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js"></script>
<link rel="import" href="/nbextensions/facets-dist/facets-jupyter.html">
<facets-dive sprite-image-width="{sprite_size}" sprite-image-height="{sprite_size}" id="elem" height="600"></facets-dive>
<script>
document.querySelector("#elem").data = {jsonstr};
</script>"""
# Load the json dataset and the sprite_size into the template
html = HTML_TEMPLATE.format(jsonstr=jsonstr, sprite_size=sprite_size)
# Display the template
display(HTML(html))